/*******************************************************************************
																				
	DESCRIPTION: 	This do file calculates and plots unemployment rate, 
					separation rate and JFR.
	
*******************************************************************************/

clear all
global id_code 001_2

********************************************************************************
* 0. Preparation of the LISA data used to obtain the number of employed
******************************************************************************* 
use LopNr_PersonNr Ar YrkStalln Alder SyssStat* using "${data}/001_02_Lisa_allYears.dta", clear

* Calculate number of employed based on SyssStat variable (i.e., employed in November)
* SyssStat is available for year 1993 - 2003
* SysStatJ is available for 2003 - 2011

gen Employed = .

replace Employed = 1 if SyssStatJ==1 | SyssStatJ==7
replace Employed = 0 if SyssStatJ==5 | SyssStat==6

replace Employed = 1 if SyssStatJ==. & SyssStat ==1
replace Employed = 0 if SyssStatJ==. & SyssStat ==5
replace Employed = 0 if SyssStatJ==. & SyssStat ==6

replace Employed = 1 if SyssStat11==1 & SyssStatJ==. & SyssStat==.
replace Employed = 1 if SyssStat11==7 & SyssStatJ==. & SyssStat==.
replace Employed = 0 if SyssStat11==5 & SyssStatJ==. & SyssStat==.
replace Employed = 0 if SyssStat11==6 & SyssStatJ==. & SyssStat==.

replace Employed = 1 if SyssStatG==1 & SyssStatJ==. & SyssStat==. & SyssStat11==.
replace Employed = 0 if SyssStatG==5 & SyssStatJ==. & SyssStat==. & SyssStat11==.
replace Employed = 0 if SyssStatG==6 & SyssStatJ==. & SyssStat==. & SyssStat11==.

drop SyssStat*

label var Employed "employment status based on SyssStat"
label define empl1 0 "Not employed" 
label define empl1 1 "Employed", add
label values Employed empl1

* Year
rename Ar year 

* Age
rename Alder age

* Calculate the employment status based on YrkStalln 
* (i.e., employed at any time in the year)
rename YrkStalln emplStatu
destring emplStatu, replace
replace emplStatu=1 if emplStatu>=1 & emplStatu<=5
label var emplStatu "employment status based on YrkStalln"
label define empl2 0 "No taxable income" 
label define empl2 1 "Any taxable income", add
label values emplStatu empl2

save "${data}/${id_code}_EmploymentData.dta", replace

********************************************************************************
* A1: Obtain the number of employed and not employed by year
*******************************************************************************
use "${data}/${id_code}_EmploymentData.dta", clear

* Only keep individuals between the age of 15 and 74 (working age population) 
keep if age>=15 & age<=74

* check whether there are duplicates of individuals in the same year
duplicates report Lop* year // no duplicates

* Calculate the total number of individual per year
bys year: gen total=_N

* Calculate the number of employed per year
bys year: egen nEmployedYS=total(emplStatu)
bys year: egen nEmployedSS=total(Employed)
label var nEmployedYS "yearly employed based on YrkStalln"
label var nEmployedSS "yearly employed based on SyssStat"

* Keep one observation per year 
duplicates drop year, force

* Calculate the share employed and not-employed
gen shareEmployedYS = nEmployedYS/total
gen shareEmployedSS = nEmployedSS/total
label var shareEmployedYS "share employed based on YrkStalln"
label var shareEmployedSS "share employed based on SyssStat"

* Drop redundant variables
drop Lop* age emplStatu Employed

save "${data}/${id_code}_Employed.dta", replace

********************************************************************************
* B1: Obtain the number of unemployed by year
*******************************************************************************
* Merge unemployment data with age variable
use LopNr_PersonNr year startU trueEnd emplAft3M_0M_In emplAft6M_0M_In using "${data}/001_1_UnemploymentSpells.dta", clear

merge m:1 LopNr_PersonNr year using "${data}/${id_code}_EmploymentData.dta", keepusing(age)
keep if _merge == 3
drop _merge 

* Only keep individuals between the age of 15 and 74 
keep if age>=15 & age<=74

* Keep only spells that are ongoing on November 1 of a year
gen Unemployed_Nov=0
forvalues y = 1992/2017 {
gen Unemployed`y'=0
replace Unemployed`y'=1 if (startU <= d(01nov`y') & trueEnd > d(01nov`y') & trueEnd!=.) | (startU <= d(01nov`y') & trueEnd==.)
}

forvalues y = 1992/2017 {
	preserve 
	keep if Unemployed`y'==1
	gen yearUnemployed = `y'
	save "${data}/${id_code}_Unemployed`y'.dta", replace
	restore
}

use "${data}/${id_code}_Unemployed1992.dta", clear
forvalues y = 1993/2017 {
	append using "${data}/${id_code}_Unemployed`y'.dta"
	erase "${data}/${id_code}_Unemployed`y'.dta"
}
drop Unemployed*
save "${data}/${id_code}_Unemployed_allyears.dta", replace
erase "${data}/${id_code}_Unemployed1992.dta"

use "${data}/${id_code}_Unemployed_allyears.dta", clear
duplicates report // no duplicates
duplicates report Lop* yearUnemployed // there are duplicates - surplus of 7K
bys Lop* yearUnemployed: gen dup=cond(_N==1,0,_n)
preserve
	keep if dup>0
restore
drop dup
* the duplicates seem to be for individuals that start the unemployemnt on different dates but a part of the spell overlaps, they constitute 0.1 % of the observations
* we drop one of the spells that belong to the duplicate pair

duplicates drop Lop* yearUnemployed, force // 7 674 observations dropped

bys yearUnemployed: gen nUnemployed=_N
drop year
duplicates drop yearUnemployed, force
rename yearUnemployed year
keep year nUnemployed
save "${data}/${id_code}_Unemployment_N.dta", replace

********************************************************************************
* B2: Obtain the unemployment rate by year
*******************************************************************************

* Merge data on unemployed and employed
use "${data}/${id_code}_Unemployment_N.dta", clear
merge 1:1 year using "${data}/${id_code}_Employed.dta"
keep if _merge==3
drop _merge

* Merge with official statistics:
merge 1:1 year using "$data\Unemployment_OECD.dta"
drop if _merge==2
drop _merge

* Calculate unemployment rate
gen shareUnempYS = nUnemployed/(nEmployedYS + nUnemployed)*100
gen shareUnempSS = nUnemployed/(nEmployedSS + nUnemployed)*100

save "${data}/${id_code}_Employed_Unemployed.dta", replace

********************************************************************************
* B3: Plot the unemployment rate by year
*******************************************************************************

use "${data}/${id_code}_Employed_Unemployed.dta", clear

* Compare the unemployment rate based on different sources (OECD vs SS)
twoway ///
	(connected shareUnempSS year if inrange(year, 1992, 2016), color(ebblue)) ///
	(connected UnempRate_OECD year if inrange(year, 1992, 2016), color(orange_red)), ///
	graphregion(color(white))														///
	ytitle("Unemployment rate (%)") 															///
	xtitle("") 																	///
	legend(cols(2) order(1 "Constructed from LISA" 2 "Source: OECD") ///
		 symxsize(*0.6) subtitle(" ", size(minuscule)) ///
		 title("Unemployment Rate:", size(medsmall) color(black)))							///
	ylabel(0(2)12, angle(0))  yscale(titlegap(3)) xlabel(1992(4)2017) xscale(titlegap(1)) 
graph export "${output}/${id_code}_Unemp_rate_SSvsOECD.pdf", as(pdf) replace

